EDA: SF Bay Access Points

Author

Jeffrey Ding

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.1     ✔ stringr   1.5.1
✔ ggplot2   4.0.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)

Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout
library(htmlwidgets)

Data Preview

access_raw <- read.csv("Shoreline-Access-Pts_v2-1-attribute-table.csv")
glimpse(access_raw)
Rows: 5,301
Columns: 61
$ Access_Point_ID                                      <int> 1477, 1477, 1538,…
$ Access_Type                                          <chr> "Marsh", "Marsh",…
$ Tidal_Wetland_Access                                 <chr> "Potentially", "P…
$ Service_Type                                         <chr> "Bike", "Drive", …
$ County_Name                                          <chr> "Napa", "Napa", "…
$ Subembayment_Name                                    <chr> "San Pablo Bay", …
$ OLU_Name                                             <chr> "Napa - Sonoma", …
$ SFBRA_Project_Name                                   <chr> "", "", "", "", "…
$ SFBRA_Project_Status                                 <chr> "", "", "", "", "…
$ SFBRA_Project_Site_Name                              <chr> "", "", "", "", "…
$ SFBRA_Zone                                           <chr> "North Bay", "Nor…
$ Shoreline_Park_Name                                  <chr> "", "", "", "", "…
$ CPAD_ID                                              <int> NA, NA, NA, NA, N…
$ TPL_ID                                               <chr> "", "", "", "", "…
$ BayTrail_SegID                                       <dbl> NA, NA, NA, NA, N…
$ WaterTrail_ID                                        <chr> "", "", "", "", "…
$ mean_Trail_Quality_Score                             <dbl> 2.18577982, 2.185…
$ Public_Transit_Stops                                 <int> 0, 0, 0, 0, 0, 0,…
$ sum_n_routes                                         <dbl> 0, 0, 0, 0, 0, 0,…
$ sum_n_arrivals                                       <dbl> 0, 0, 0, 0, 0, 0,…
$ mean_n_arrivals                                      <dbl> NA, NA, NA, NA, N…
$ sum_n_hours_in_service                               <dbl> 0, 0, 0, 0, 0, 0,…
$ mean_n_hours_in_service                              <dbl> NA, NA, NA, NA, N…
$ Workgroup_Addition                                   <chr> "", "", "", "", "…
$ SUM_Estimated_Total_Population                       <dbl> 856.71881, 29373.…
$ SUM_Estimated_Low_Income_Households                  <dbl> 6.780970, 2425.05…
$ SUM_Estimated_People_of_Color                        <dbl> 175.05609, 14613.…
$ SUM_Estimated_Total_Households                       <dbl> 77.71941, 10287.1…
$ SUM_Estimated_Average_Household_Size                 <dbl> 1.98479407, 58.44…
$ SUM_Estimated_Renter_Households                      <dbl> 5.174951, 5011.17…
$ SUM_Estimated_Children_Under_5                       <dbl> 4.9965042, 1574.2…
$ SUM_Estimated_Households_With_No_Vehicle             <dbl> 1.070679, 493.481…
$ SUM_Estimated_Households_With_Disability             <dbl> 11.955921, 2611.5…
$ SUM_Estimated_Single_Parent_Households               <dbl> 8.2085427, 1135.9…
$ SUM_Estimated_Seniors_Living_Alone                   <dbl> 0.7137863, 1157.7…
$ SUM_Estimated_Population_With_No_High_School_Diploma <dbl> 24.268735, 3318.3…
$ SUM_Estimated_Mortgage_Housing_Cost_Burden           <dbl> 5.8887371, 613.40…
$ SUM_Estimated_Limited_English_Proficiency_Households <dbl> 1.070679, 689.233…
$ SUM_Estimated_Below_200_Percent_Poverty              <dbl> 37.652228, 7521.9…
$ SUM_Estimated_Rent_Housing_Cost_Burden               <dbl> 0.000000, 1155.27…
$ SUM_Estimated_Non_Citizens                           <dbl> 48.359023, 10654.…
$ SUM_Estimated_Households_Below_50_Percent_Median_AMI <dbl> 6.780970, 2425.05…
$ SUM_Estimated_Foreign_Born                           <dbl> 243.11765, 16502.…
$ SUM_Estimated_Spanish_LEP                            <dbl> 101.214168, 10660…
$ SUM_Estimated_Chinese_LEP                            <dbl> 20.2792221, 164.0…
$ SUM_Estimated_Vietnamese_LEP                         <dbl> 4.28271791, 6.536…
$ SUM_Estimated_Tagalog_LEP                            <dbl> 14.454173, 116.73…
$ SUM_Estimated_Latino_Population                      <dbl> 52.82019, 12598.5…
$ SUM_Estimated_Black_Population                       <dbl> 2.319806e+00, 1.9…
$ SUM_Estimated_American_Indian_Population             <dbl> 4.818058, 248.034…
$ SUM_Estimated_Asian_Population                       <dbl> 101.179211, 978.1…
$ SUM_Estimated_Pacific_Islander_Population            <dbl> 0.00000, 45.23836…
$ SUM_Estimated_Other_Race_Population                  <dbl> 29.443686, 3846.5…
$ SUM_Estimated_Two_Or_More_Races_Population           <dbl> 25.160968, 3127.4…
$ SUM_Estimated_Population_Under_10                    <dbl> 28.551453, 3418.2…
$ SUM_Estimated_Population_Under_18                    <dbl> 58.35203, 6357.61…
$ SUM_Low_Vuln_Households                              <dbl> 0.00000, 3553.004…
$ SUM_Moderate_Vuln_Households                         <dbl> 43.71941, 2426.34…
$ SUM_High_Vuln_Households                             <dbl> 0.00000, 3229.590…
$ SUM_Highest_Vuln_Households                          <dbl> 0.000, 1044.253, …
$ SFBRA_Site                                           <chr> "No", "No", "No",…
paste("Number of unique access points:", length(unique(access_raw$Access_Point_ID)))
[1] "Number of unique access points: 2048"

Accessibility

1. Access Points and Transportation Modes Serviced

# Set granularity to one row per access point
transit <- access_raw %>%
    group_by(Access_Point_ID) %>%
    reframe(
        Access_Type = first(Access_Type),
        County_Name = first(County_Name),
        Subembayment_Name = first(Subembayment_Name),
        OLU_Name = first(OLU_Name),

        # Public transit information
        Public_Transit_Stops = first(Public_Transit_Stops),
        sum_n_routes = max(sum_n_routes),

        # Binarize walk/bike/drive service availability
        n_service_types = n_distinct(Service_Type),
        Walk = any(Service_Type == "Walk"),
        Bike = any(Service_Type == "Bike"),
        Drive = any(Service_Type == "Drive"),

        # Demographic information for access point's service area
        Estimated_Total_Population = first(SUM_Estimated_Total_Population),
        Estimated_People_of_Color = first(SUM_Estimated_People_of_Color),

        Estimated_Total_Households = first(SUM_Estimated_Total_Households),
        Estimated_Average_Household_Size = first(SUM_Estimated_Average_Household_Size),
        Estimated_Low_Income_Households = first(SUM_Estimated_Low_Income_Households),
        Estimated_No_Vehicle_Households = first(SUM_Estimated_Households_With_No_Vehicle)
    )

glimpse(transit)
Rows: 2,048
Columns: 17
$ Access_Point_ID                  <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12…
$ Access_Type                      <chr> "Marsh", "Shoreline Park", "Shoreline…
$ County_Name                      <chr> "San Francisco", "Contra Costa", "Con…
$ Subembayment_Name                <chr> "Central Bay", "San Pablo Bay", "San …
$ OLU_Name                         <chr> "Mission - Islais", "Pinole", "Pinole…
$ Public_Transit_Stops             <int> 4, 0, 2, 22, 64, 0, 0, 60, 2, 42, 12,…
$ sum_n_routes                     <dbl> 4, 0, 4, 22, 106, 0, 0, 88, 2, 70, 31…
$ n_service_types                  <int> 2, 2, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3…
$ Walk                             <lgl> FALSE, FALSE, TRUE, TRUE, TRUE, FALSE…
$ Bike                             <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, …
$ Drive                            <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, T…
$ Estimated_Total_Population       <dbl> 85900.67675, 16305.91975, 1322.54901,…
$ Estimated_People_of_Color        <dbl> 63200.99755, 14175.69783, 1007.79507,…
$ Estimated_Total_Households       <dbl> 29925.97943, 5217.92039, 433.54155, 5…
$ Estimated_Average_Household_Size <dbl> 172.6602979, 28.3499626, 2.5572838, 3…
$ Estimated_Low_Income_Households  <dbl> 9066.676433, 1416.206890, 87.016453, …
$ Estimated_No_Vehicle_Households  <dbl> 6096.8166882, 379.9145699, 7.0176795,…
# Total counts of access points in each county
total_counts <- transit %>%
    mutate(Total = Walk | Bike | Drive) %>%
    group_by(County_Name) %>%
    reframe(
        Mode = "Total",
        n = sum(Total, na.rm = TRUE)
    )

# Construct county order by total access points
county_order <- total_counts %>%
    arrange(n) %>%
    pull(County_Name)

# Reorder total_counts
total_counts <- total_counts %>%
    mutate(County_Name = factor(County_Name, levels = county_order))


# Counts of access points in each county by transportation modes serviced
mode_counts <- transit %>%
    pivot_longer(c(Walk, Bike, Drive), names_to = "Mode", values_to = "Available") %>%
    mutate(
        County_Name = factor(County_Name, levels = county_order),
        Mode = factor(Mode, levels = c("Total", "Drive", "Bike", "Walk"))
    ) %>%
    group_by(County_Name, Mode) %>%
    reframe(n = sum(Available, na.rm = TRUE))

# Merge mode_counts and total_counts + order mode values for plotting
transport_modes <- bind_rows(mode_counts, total_counts) %>%
    mutate(Mode = factor(Mode, levels = c("Total", "Drive", "Bike", "Walk"))) %>%
    arrange(County_Name, Mode)

head(transport_modes, 12)
# A tibble: 12 × 3
   County_Name   Mode      n
   <fct>         <fct> <int>
 1 Napa          Total    83
 2 Napa          Drive    83
 3 Napa          Bike     81
 4 Napa          Walk     57
 5 Santa Clara   Total   100
 6 Santa Clara   Drive   100
 7 Santa Clara   Bike    100
 8 Santa Clara   Walk     90
 9 San Francisco Total   124
10 San Francisco Drive   124
11 San Francisco Bike    124
12 San Francisco Walk    111
# Construct plot
transport_modes_plot <- ggplot() +
    # Total access points by county (underlaid)
    geom_col(
        data = total_counts,
        aes(x = County_Name, y = n,
            text = paste0(
                "County: ", County_Name,
                "<br>Total Access Points: ", n
            )),
        alpha = 0.35, width = 0.85
    ) +
    # Access points by county + transport mode (overlaid)
    geom_col(
        data = mode_counts, 
        aes(x = County_Name, y = n, fill = Mode,
            text = paste0(
                "County: ", County_Name,
                "<br>Transport Mode: ", Mode,
                "<br>Access Points: ", n
            )),
        color = "black", position = "dodge", width = 0.8
    ) +
    scale_fill_manual(
        values = c(
        Walk = "lightgreen",
        Bike = "deepskyblue",
        Drive = "salmon",
        Total = "grey"
        )
    ) +
    coord_flip() +
    labs(
        title = "Bay Access Points by County and Supported Transportation Modes",
        x = "County", 
        y = "Number of access points"
    ) +
    theme_bw()
Warning in geom_col(data = total_counts, aes(x = County_Name, y = n, text =
paste0("County: ", : Ignoring unknown aesthetics: text
Warning in geom_col(data = mode_counts, aes(x = County_Name, y = n, fill =
Mode, : Ignoring unknown aesthetics: text
ggplotly(transport_modes_plot, tooltip = "text") %>%
    config(responsive = TRUE)

2. Access Points and Public Transit Service

transit <- transit %>%
    mutate(County_OLU = paste(County_Name, ":", OLU_Name))

county_olu_order <- transit %>%
    arrange(desc(County_OLU)) %>%
    pull(County_OLU) %>%
    unique()

transit <- transit %>%
    mutate(County_OLU = factor(County_OLU, levels = county_olu_order))

# Subset without select outlier OLUs
transit_sub_olu <- transit %>%
    filter((OLU_Name != "Mission - Islais") & (OLU_Name != "Golden Gate")) 

# Subset with select counties
transit_sub_county <- transit %>%
    filter(County_Name %in% c("Alameda", "Contra Costa", "Marin", "San Mateo"))

head(transit, 10)
# A tibble: 10 × 18
   Access_Point_ID Access_Type    County_Name   Subembayment_Name OLU_Name      
             <int> <chr>          <chr>         <chr>             <chr>         
 1               1 Marsh          San Francisco Central Bay       Mission - Isl…
 2               2 Shoreline Park Contra Costa  San Pablo Bay     Pinole        
 3               3 Shoreline Park Contra Costa  San Pablo Bay     Pinole        
 4               4 Shoreline Park San Francisco Central Bay       Yosemite - Vi…
 5               5 Shoreline Park San Francisco Central Bay       Mission - Isl…
 6               6 Water Trail    Sonoma        San Pablo Bay     Petaluma      
 7               7 Marsh          San Mateo     South Bay         Belmont - Red…
 8               8 Shoreline Park San Francisco Central Bay       Mission - Isl…
 9               9 Bay Trail      Alameda       Central Bay       San Leandro   
10              10 Shoreline Park San Francisco Central Bay       Mission - Isl…
# ℹ 13 more variables: Public_Transit_Stops <int>, sum_n_routes <dbl>,
#   n_service_types <int>, Walk <lgl>, Bike <lgl>, Drive <lgl>,
#   Estimated_Total_Population <dbl>, Estimated_People_of_Color <dbl>,
#   Estimated_Total_Households <dbl>, Estimated_Average_Household_Size <dbl>,
#   Estimated_Low_Income_Households <dbl>,
#   Estimated_No_Vehicle_Households <dbl>, County_OLU <fct>
# Construct reusable plot config
transit_access_config1 <- list(
    geom_boxplot(),
    coord_flip(),
    scale_fill_manual(
        values = c(
            "Alameda" = "#E69F00",
            "Contra Costa" = "#56B4E9",
            "Marin" = "#009E73",
            "Napa" = "#F0E442",
            "San Francisco" = "#0072B2",
            "San Mateo" = "#D55E00",
            "Santa Clara" = "#CC79A7",
            "Solano" = "#90EE90",
            "Sonoma" = "#B0E2FF"
        )
    ),
    scale_x_discrete(labels = function(x) sub("^.*\\s:\\s", "", x)),
    labs(
        title = "Public Transit Stops Near Bay Access Points by OLU",
        x = "Operational Landscape Unit (OLU)",
        y = "Number of nearby public transit stops",
        fill = "County"
    ),
    theme_bw()
)


# Construct plot for full transit dataframe
transit_access_plot1 <- ggplot(
    data = transit,
    aes(x = County_OLU, y = Public_Transit_Stops,
        fill = County_Name,
        text = paste0(
            "County: ", County_Name,
            "<br>OLU: ", OLU_Name
        ))
    ) +
    transit_access_config1

ggplotly(transit_access_plot1) %>%
  config(responsive = TRUE)
# Construct plot for OLU subset transit dataframe
transit_access_plot2 <- ggplot(
    data = transit_sub_olu,
    aes(x = County_OLU, y = Public_Transit_Stops,
        fill = County_Name,
        text = paste0(
            "County: ", County_Name,
            "<br>OLU: ", OLU_Name
        ))
    ) +
    transit_access_config1

ggplotly(transit_access_plot2) %>%
    config(responsive = TRUE) %>%
    layout(
        annotations = list(
        list(
            x = 0, y = -0.1, xref = "paper", yref = "paper",
            text = "OLUs excluded: Mission-Islais, Golden Gate",
            showarrow = FALSE,
            xanchor = "left",
            align = "left",
            font = list(size = 14)
        )
        ),
        margin = list(t = 50, b = 100)
    )
# Construct reusable plot config
transit_access_config2 <- list(
    coord_flip(),
    scale_x_discrete(labels = function(x) sub("^.*\\s:\\s", "", x)),
    labs(
        title = "Public Transit Stops Near Bay Access Points by OLU and County",
        x = NULL,
        y = "Number of nearby public transit stops"
    ),
    theme_bw()
)


# Construct plot for OLU subset transit dataframe
transit_access_plot3 <- ggplot(transit_sub_olu, aes(x = OLU_Name, y = Public_Transit_Stops)) +
    geom_boxplot(fill = "#B0E2FF") +
    facet_wrap(~ County_Name, scales = "free_y", ncol = 3) +
    transit_access_config2 +
    theme(
        panel.spacing = unit(9, "lines"),
        plot.margin = margin(20, 20, 20, 20)  # top, right, bottom, left
    )

ggplotly(transit_access_plot3) %>%
    config(responsive = TRUE) %>%
    layout(
        annotations = list(
        list(
            x = 0, y = -0.11, xref = "paper", yref = "paper",
            text = "OLUs excluded: Mission-Islais, Golden Gate",
            showarrow = FALSE,
            xanchor = "left",
            align = "left",
            font = list(size = 14)
        )
        ),
        margin = list(t = 90, b = 90)
    )
# Construct plot for county subset transit dataframe
transit_access_plot4 <- ggplot(transit_sub_county, aes(x = OLU_Name, y = Public_Transit_Stops)) +
    geom_boxplot(fill = "#B0E2FF") +
    facet_wrap(~ County_Name, scales = "free_y", ncol = 2) +
    transit_access_config2 +
    theme(
        panel.spacing = unit(15, "lines"),
        plot.margin = margin(20, 20, 20, 20)  # top, right, bottom, left
    )

ggplotly(transit_access_plot4) %>%
    config(responsive = TRUE) %>%
    layout(
        annotations = list(
        list(
            x = 0, y = -0.15, xref = "paper", yref = "paper",
            text = "Select counties only: Alameda, Contra Costa, Marin, San Mateo",
            showarrow = FALSE,
            xanchor = "left",
            align = "left",
            font = list(size = 14)
        )
        ),
        margin = list(t = 90, b = 90)
    )